Graphs

Relationship between Educational Attainment and GDP per Capita

Relationship between Educational Attainment and GDP per Capita by Continent

This version of Shiny is designed to work with 'htmlwidgets' >= 1.5.
    Please upgrade via install.packages('htmlwidgets').

Relationship between Educational Attainment and Child Mortality Rate by Continent

Relationship between Educational Attainment and Number of Workers in Family

Timeline Graph 1

primary_school %>%
  group_by(continent, year) %>%
  summarize(average = mean(primary_school)) %>%
  ggplot(aes(x=year,y=average, color=continent)) + 
  geom_point() + 
  geom_line() + 
  labs(x="Year", y="Average Rate of Educational Attainment", 
       title = "Educational Attainment over Time by Continent")

Map of World Educational Attainment

world <- primary_school %>%
  distinct(country)

map.world <- map_data("world")

world %>% 
  ggplot() +
  geom_map(data = map.world, map = map.world, 
           aes(map_id = region), fill = "white", color = "black") + 
  geom_map(aes(map_id = country), map = map.world) +
  expand_limits(x = map.world$long, y = map.world$lat) + # scale for fill
  theme_map() 






worldmap <- get_stamenmap(
    bbox = c(left = -180, bottom = -57, right = 179, top = 82.1), 
    maptype = "toner-background",
    zoom = 2
)
Source : http://tile.stamen.com/toner-background/2/0/0.png
Source : http://tile.stamen.com/toner-background/2/1/0.png
Source : http://tile.stamen.com/toner-background/2/2/0.png
Source : http://tile.stamen.com/toner-background/2/3/0.png
Source : http://tile.stamen.com/toner-background/2/0/1.png
Source : http://tile.stamen.com/toner-background/2/1/1.png
Source : http://tile.stamen.com/toner-background/2/2/1.png
Source : http://tile.stamen.com/toner-background/2/3/1.png
Source : http://tile.stamen.com/toner-background/2/0/2.png
Source : http://tile.stamen.com/toner-background/2/1/2.png
Source : http://tile.stamen.com/toner-background/2/2/2.png
Source : http://tile.stamen.com/toner-background/2/3/2.png
ggmap(worldmap)

primary_school %>%
  filter(decade == 1990) %>%
  group_by(decade, country) %>%
  summarize(average = mean(child_mortality)) %>%
  arrange(desc(average)) %>%
  filter(country==c("Niger", "Costa Rica")) 
longer object length is not a multiple of shorter object length

Tables

Mean Educational Attainment by Continent

primary_school <- primary_school %>%
  mutate(high_gdp = gdp_capita > 3955) 
# scatterplots to find interactions between variables
ggplot(primary_school, aes(x=gdp_capita, y=unemployment, color=high_gdp)) + geom_point()



# linear regression
library(broom)
tidy(lm(primary_school ~ high_gdp + unemployment, data=primary_school))

tidy(lm(primary_school ~ gdp_capita + unemployment + child_mortality, data=primary_school))

tidy(lm(primary_school ~ gdp_capita + unemployment + rural_pop, data=primary_school))

# an interaction effect will let one variable affect the other. Doesn't mean that the two variables are related
# coeff 0 -> no relationship
# less variables to fit b/c NA

country_list <- primary_school %>% 
  select(1:1) %>%
  distinct() 

save(country_list,file="country_list.Rda") 
  

## Map of World Educational Attainment
world <- primary_school %>%
  filter(year==c("2014","2015")) %>% 
  distinct(country)

map.world <- map_data("world")

world %>% 
  ggplot() +
  geom_map(data = map.world, map = map.world, 
           aes(map_id = region), fill = "white", color = "black") + 
  geom_map(aes(map_id = country), map = map.world) +
  expand_limits(x = map.world$long, y = map.world$lat) + # scale for fill
  theme_map() 






worldmap <- get_stamenmap(
    bbox = c(left = -180, bottom = -57, right = 179, top = 82.1), 
    maptype = "toner-background",
    zoom = 2
)

ggmap(worldmap)

primary_school %>%
  ggplot(aes(x=year, y=primary_school, color=high_gdp)) + 
  geom_jitter() +
  facet_wrap(~ high_gdp) +
  geom_smooth(se=FALSE)

plot <- ggplot(primary_school, aes(x=gdp_capita, y=primary_school, color=high_gdp)) + 
  geom_point() + 
  geom_smooth(method = "lm", se = FALSE) +
  labs(x="GDP per capita", y="Educational Attainment (Primary School)", 
       title="Educational Attainment and GDP per Capita by Continent")

ggplotly(plot)

NA

##Schooling Cost Graph

ggplot(primary_school,aes(x=schooling_cost,y=primary_school, color=high_gdp))+
  geom_point()+
  geom_smooth(method="lm", se=FALSE)

Gini

---
title: "Data Science Final Project"
author: "Blair Cha, Kaarin Khandelwal, Dylan Larsen"
output:
  html_document:
    df_print: paged
  html_notebook: default
  pdf_document: default
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

```{r, warning=FALSE, echo=FALSE, message = FALSE}
library(readxl)
library(dplyr)
library(tidyverse)
library(lubridate)
library(countrycode)
library(stringr)
library(wesanderson)
library(plotly)
library(ggmap)
library(ggthemes)
primary_school0 <- read_excel("primary_school.xlsx")
urban_poverty0 <- read_excel("urban_poverty.xlsx")
unemployment0 <- read_excel("unemployment.xlsx")
gdp_capita0 <- read_excel("gdp_capita.xlsx")
child_mortality0 <- read_excel("child_mortality.xlsx")
family_workers0 <- read_excel("family_workers.xlsx")
rural_pop0 <- read_excel("rural_pop.xlsx")
schooling_cost0 <- read_excel("schooling_cost.xlsx")
```

```{r, warning=FALSE, echo=FALSE}
# Reassembling each dataset to longer style

primary_school <- primary_school0 %>%
  pivot_longer(cols = -country,
               names_to = "year",
               values_to="primary_school") %>%
  mutate(year = as.numeric(year))


urban_poverty <- urban_poverty0 %>%
  pivot_longer(cols = -country,
               names_to = "year",
               values_to="urban_poverty") %>%
    mutate(year = as.numeric(year))

unemployment <- unemployment0 %>%
  pivot_longer(cols = -country,
               names_to = "year",
               values_to="unemployment") %>%
    mutate(year = as.numeric(year))

gdp_capita <- gdp_capita0 %>%
  pivot_longer(cols = -country,
               names_to = "year",
               values_to="gdp_capita") %>%
    mutate(year = as.numeric(year))

child_mortality <- child_mortality0 %>%
  pivot_longer(cols = -country,
               names_to = "year",
               values_to="child_mortality") %>%
    mutate(year = as.numeric(year))

family_workers <- family_workers0 %>%
  pivot_longer(cols = -country,
               names_to = "year",
               values_to="family_workers") %>%
    mutate(year = as.numeric(year))

rural_pop <- rural_pop0 %>%
  pivot_longer(cols = -country,
               names_to = "year",
               values_to="rural_pop") %>%
    mutate(year = as.numeric(year))


schooling_cost <- schooling_cost0 %>%
  pivot_longer(cols = -country,
               names_to = "year",
               values_to="schooling_cost") %>%
    mutate(year = as.numeric(year))
```

```{r, warning=FALSE, echo=FALSE}
# Joining every dataset to primary_school

primary_school1 <- primary_school %>%
  inner_join(urban_poverty, by = c("year","country")) %>%
  inner_join(unemployment, by = c("year","country")) %>%
  inner_join(gdp_capita, by = c("year","country")) %>%
  inner_join(child_mortality, by = c("year","country")) %>%
  inner_join(family_workers, by = c("year","country")) %>%
  inner_join(rural_pop, by = c("year","country")) %>%
  inner_join(schooling_cost, by = c("year","country"))
```

```{r, warning=FALSE, echo=FALSE}
# Adding continent variable

newdata <- data.frame(country=primary_school$country)
newdata$continent <- countrycode(sourcevar = primary_school$country, 
                                 origin= "country.name", destination= "continent")
newdata1 <- newdata %>%
  select(continent, country) %>%
  distinct()

primary_school <- primary_school1 %>%
  inner_join(newdata1, by="country") %>%
  mutate(decade = floor(year/10)*10) %>%
  drop_na(primary_school) 

View(primary_school)

# Suggestions: 1) make some variables categorical (category can be "missing value") 2) don't use variables with too many NAs 3) select range of years 4) find other new variables 
```

```{r, warning=FALSE, echo=FALSE}
save(primary_school,file="primary_school.Rda") 
#load it with load("primary_school.Rda")
```

# Graphs

## Relationship between Educational Attainment and GDP per Capita


## Relationship between Educational Attainment and GDP per Capita by Continent
```{r, warning=FALSE, echo=FALSE}
plot1 <- ggplot(primary_school, aes(x=gdp_capita, y=primary_school, color=continent)) + 
  geom_point() + 
  geom_smooth(method = "lm", se = FALSE) +
  labs(x="GDP per capita", y="Educational Attainment (Primary School)", 
       title="Educational Attainment and GDP per Capita by Continent")

ggplotly(plot1)
```

## Relationship between Educational Attainment and Child Mortality Rate by Continent
```{r, warning=FALSE, echo=FALSE}
plot2 <- ggplot(primary_school, aes(x=child_mortality, y=primary_school, color=continent))+
  geom_point() + 
  geom_smooth(method = "lm", se = FALSE) +
  scale_color_manual(values = wes_palette(n=5, name="Moonrise3")) +
  labs(x="Child Mortality Rate", y="Educational Attainment (Primary School)", 
       title="Educational Attainment and Child Mortality Rate by Continent")

ggplotly(plot2)
```

## Relationship between Educational Attainment and Number of Workers in Family
```{r, warning=FALSE, echo=FALSE}
plot3 <- ggplot(primary_school, aes(x=family_workers, y=schooling_cost, color=gdp_capita)) + 
  geom_point(size=2, alpha=0.7) + 
  scale_color_gradient(low = "red", high="blue") +
  labs(x="Number of Workers in Family", y="Educational Attainment (Primary School)", 
       title="Educational Attainment and Number of Workers in Family", color="GDP per Capita")

ggplotly(plot3)
```

## Timeline Graph 1 
```{r}
primary_school %>%
  group_by(continent, year) %>%
  summarize(average = mean(primary_school)) %>%
  ggplot(aes(x=year,y=average, color=continent)) + 
  geom_point() + 
  geom_line() + 
  labs(x="Year", y="Average Rate of Educational Attainment", 
       title = "Educational Attainment over Time by Continent")
```


## Map of World Educational Attainment
```{r}
world <- primary_school %>%
  distinct(country)

map.world <- map_data("world")

world %>% 
  ggplot() +
  geom_map(data = map.world, map = map.world, 
           aes(map_id = region), fill = "white", color = "black") + 
  geom_map(aes(map_id = country), map = map.world) +
  expand_limits(x = map.world$long, y = map.world$lat) + # scale for fill
  theme_map() 





worldmap <- get_stamenmap(
    bbox = c(left = -180, bottom = -57, right = 179, top = 82.1), 
    maptype = "toner-background",
    zoom = 2
)

ggmap(worldmap)
```



```{r}
primary_school %>%
  filter(decade == 1990) %>%
  group_by(decade, country) %>%
  summarize(average = mean(child_mortality)) %>%
  arrange(desc(average)) %>%
  filter(country==c("Niger", "Costa Rica")) 
```



# Tables
## Mean Educational Attainment by Continent
```{r, warning=FALSE, echo=FALSE}
primary_school %>%
  group_by(continent) %>%
  summarize(primary_school_mean = mean(primary_school)) %>%
  arrange(desc(primary_school_mean))
```

```{r}
primary_school <- primary_school %>%
  mutate(high_gdp = gdp_capita > 3955) 
```

```{r}
# scatterplots to find interactions between variables
ggplot(primary_school, aes(x=gdp_capita, y=unemployment, color=high_gdp)) + geom_point()


# linear regression
library(broom)
tidy(lm(primary_school ~ high_gdp + unemployment, data=primary_school))

tidy(lm(primary_school ~ gdp_capita + unemployment + child_mortality, data=primary_school))

tidy(lm(primary_school ~ gdp_capita + unemployment + rural_pop, data=primary_school))

# an interaction effect will let one variable affect the other. Doesn't mean that the two variables are related
# coeff 0 -> no relationship
# less variables to fit b/c NA

country_list <- primary_school %>% 
  select(1:1) %>%
  distinct() 

save(country_list,file="country_list.Rda") 
  
```

```{r}

## Map of World Educational Attainment
world <- primary_school %>%
  filter(year==c("2014","2015")) %>% 
  distinct(country)

map.world <- map_data("world")

world %>% 
  ggplot() +
  geom_map(data = map.world, map = map.world, 
           aes(map_id = region), fill = "white", color = "black") + 
  geom_map(aes(map_id = country), map = map.world) +
  expand_limits(x = map.world$long, y = map.world$lat) + # scale for fill
  theme_map() 





worldmap <- get_stamenmap(
    bbox = c(left = -180, bottom = -57, right = 179, top = 82.1), 
    maptype = "toner-background",
    zoom = 2
)

ggmap(worldmap)
```

```{r}
primary_school %>%
  ggplot(aes(x=year, y=primary_school, color=high_gdp)) + 
  geom_jitter() +
  facet_wrap(~ high_gdp) +
  geom_smooth(se=FALSE)
```

```{r}
plot <- ggplot(primary_school, aes(x=gdp_capita, y=primary_school, color=high_gdp)) + 
  geom_point() + 
  geom_smooth(method = "lm", se = FALSE) +
  labs(x="GDP per capita", y="Educational Attainment (Primary School)", 
       title="Educational Attainment and GDP per Capita by Continent")

ggplotly(plot)

```

##Schooling Cost Graph

```{r}
ggplot(primary_school,aes(x=schooling_cost,y=primary_school, color=high_gdp))+
  geom_point()+
  geom_smooth(method="lm", se=FALSE)
```

## Gini 

